{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# 05 Two-way frequency tables and Venn diagrams" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "%%html\n", "" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import matplotlib.pyplot as plt\n", "import numpy as np\n", "import pandas as pd\n", "import plotly.graph_objects as go\n", "import seaborn as sns\n", "from matplotlib_venn import venn2" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "import findspark\n", "\n", "findspark.init()\n", "from pyspark.context import SparkContext\n", "from pyspark.sql.session import SparkSession\n", "\n", "spark = SparkSession.builder.appName(\"statistics\").master(\"local\").getOrCreate()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "[khanacademy](https://www.khanacademy.org/math/ap-statistics/analyzing-categorical-ap/stats-two-way-tables/v/two-way-frequency-tables-and-venn-diagrams?modal=1)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "![Two-way frequency tables and Venn diagrams fig 1](./imgs/01-04-01.png)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "dataset = {\n", " \"chocolate\": [\n", " \"yes\",\n", " \"yes\",\n", " \"yes\",\n", " \"yes\",\n", " \"yes\",\n", " \"yes\",\n", " \"yes\",\n", " \"yes\",\n", " \"yes\",\n", " \"no\",\n", " \"no\",\n", " \"no\",\n", " ],\n", " \"coconut\": [\n", " \"yes\",\n", " \"yes\",\n", " \"yes\",\n", " \"no\",\n", " \"no\",\n", " \"no\",\n", " \"no\",\n", " \"no\",\n", " \"no\",\n", " \"yes\",\n", " \"no\",\n", " \"no\",\n", " ],\n", "}" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
chocolatecoconut
0yesyes
1yesyes
2yesyes
3yesno
4yesno
5yesno
6yesno
7yesno
8yesno
9noyes
10nono
11nono
\n", "
" ], "text/plain": [ " chocolate coconut\n", "0 yes yes\n", "1 yes yes\n", "2 yes yes\n", "3 yes no\n", "4 yes no\n", "5 yes no\n", "6 yes no\n", "7 yes no\n", "8 yes no\n", "9 no yes\n", "10 no no\n", "11 no no" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = pd.DataFrame(dataset)\n", "df" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "+---------+-------+\n", "|chocolate|coconut|\n", "+---------+-------+\n", "| yes| yes|\n", "| yes| yes|\n", "| yes| yes|\n", "| yes| no|\n", "| yes| no|\n", "| yes| no|\n", "| yes| no|\n", "| yes| no|\n", "| yes| no|\n", "| no| yes|\n", "| no| no|\n", "| no| no|\n", "+---------+-------+\n", "\n" ] } ], "source": [ "sdf = spark.createDataFrame(zip(*dataset.values()), schema=list(dataset.keys()))\n", "sdf.registerTempTable(\"sdf_table\")\n", "sdf.show()" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "chocolate\n", "no 3\n", "yes 9\n", "dtype: int64" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.groupby([\"chocolate\"]).size()" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "+---------+-----+\n", "|chocolate|count|\n", "+---------+-----+\n", "| no| 3|\n", "| yes| 9|\n", "+---------+-----+\n", "\n" ] } ], "source": [ "sdf.groupby(\"chocolate\").count().show()" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "+---------+-----+\n", "|chocolate|count|\n", "+---------+-----+\n", "| no| 3|\n", "| yes| 9|\n", "+---------+-----+\n", "\n" ] } ], "source": [ "spark.sql(\n", " \"select chocolate, count(*) as count from sdf_table group by chocolate\"\n", ").show()" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "coconut\n", "no 8\n", "yes 4\n", "dtype: int64" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.groupby([\"coconut\"]).size()" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "+-------+-----+\n", "|coconut|count|\n", "+-------+-----+\n", "| no| 8|\n", "| yes| 4|\n", "+-------+-----+\n", "\n" ] } ], "source": [ "sdf.groupby(\"coconut\").count().show()" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "+-------+-----+\n", "|coconut|count|\n", "+-------+-----+\n", "| no| 8|\n", "| yes| 4|\n", "+-------+-----+\n", "\n" ] } ], "source": [ "spark.sql(\"select coconut, count(*) as count from sdf_table group by coconut\").show()" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "chocolate coconut\n", "no no 2\n", " yes 1\n", "yes no 6\n", " yes 3\n", "dtype: int64" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.groupby([\"chocolate\", \"coconut\"]).size()" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "+---------+-------+-----+\n", "|chocolate|coconut|count|\n", "+---------+-------+-----+\n", "| no| no| 2|\n", "| no| yes| 1|\n", "| yes| yes| 3|\n", "| yes| no| 6|\n", "+---------+-------+-----+\n", "\n" ] } ], "source": [ "sdf.groupby(\"chocolate\", \"coconut\").count().show()" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "+---------+-------+-----+\n", "|chocolate|coconut|count|\n", "+---------+-------+-----+\n", "| no| no| 2|\n", "| no| yes| 1|\n", "| yes| yes| 3|\n", "| yes| no| 6|\n", "+---------+-------+-----+\n", "\n" ] } ], "source": [ "spark.sql(\n", " \"select chocolate, coconut, count(*) as count from sdf_table group by chocolate, coconut\"\n", ").show()" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
coconutnoyesAll
chocolate
no213
yes639
All8412
\n", "
" ], "text/plain": [ "coconut no yes All\n", "chocolate \n", "no 2 1 3\n", "yes 6 3 9\n", "All 8 4 12" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.crosstab(df[\"chocolate\"], df[\"coconut\"], margins=True)" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "+-----------------+---+---+\n", "|chocolate_coconut| no|yes|\n", "+-----------------+---+---+\n", "| yes| 6| 3|\n", "| no| 2| 1|\n", "+-----------------+---+---+\n", "\n" ] } ], "source": [ "sdf.crosstab(\"chocolate\", \"coconut\").show()" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAQUAAADrCAYAAABgr4PXAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjMsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+AADFEAAAgAElEQVR4nO3deZxcZZ3v8c9TXV29d6fTJ+ms3SFkIZElFQgMAVzGFRWXO4LrIF5Er4MoIy6lZCxLRWr0uo6zvOaKzqjjdgdFZ7woKDqMgCNgiUAwZO10kk4n1fta1VX1zB/nJLerzdJLnfOcU/V7v171Ir1UPb9uur/9nOc8i9JaI4QQJ4RMFyCE8BcJBSFEEQkFIUQRCQUhRBEJBSFEEQkFIUQRCQUhRBEJBSFEEQkFIUQRCQUhRBEJBSFEEQkFIUQRCQUhRBEJBSFEEQkFIUQRCQUhRBEJBSFEEQkFIUQRCQUhRBEJBSFEEQkFIUQRCQUhRJGw6QJE8CVT6RD2z5ICsrGoJecGBJiScx/EdMlUuhnoACyg9RSPFqAaOwRCQBV2GJyggSwwCYwBI8AoMAgcA44CPUBvLGr1u/8VibmSUKhQyVS6FjgH6MQOgU7n0eJhGRmgFzgE7AaeBZ6NRa1JD2sQM0goVIhkKt0EXAA8B9gMrMWfY0oa6MYOiF3Ak7GoddhsSZVFQqFMJVNpBWwCLgW2YIeAOuOT/KsHeMx5PBmLWlOG6ylrEgplxAmCzcAVzmOx2YpcMQk8ATwKPByLWiOG6yk7EgplIJlKbwKuonyD4HRywOPAA8BvYlErZ7iesiChEFDOQOHzgVcAa4wW4w/DwC+An8aiVrfpYoJMQiFgkqn0Suwg+FOgwXA5fvUkcHcsaj1uupAgklAIiGQqfSFwLXARwR0w9Np+4G7gP2NRq2C6mKCQUPC5ZCq9GXgL9u1EMT+9wA+A+2NRK2u6GL+TUPCpZCq9EXgzEDVdSxkZAL6JHQ7yg38aEgo+k0ylO4AbgG2GSyln+4G7YlHrCdOF+JGEgk8kU+kG4E3Yg4hVhsupFI8CX41FrUOmC/ETCQUfSKbSLwBuxNt1B8KWB34MfEPWXNgkFAxKptLLgZux7ygIs3qBL8ai1pOmCzFNQsGQZCr9KuCtQMR0LeIkDdwLfK2Sew0SCh5LptItwF8CF5uuRZxWL/ClWNT6velCTJBQ8FAylb4YuBVYZLoWcVYa+BHwT5W2pkJCwQPJVLoa+zbjNchsxKDZBfx1LGodN12IVyQUXJZMpS1gB3Cu6VrEvI0An4lFrZTpQrwgoeCiZCq9HjsQKmk5c7kqAF+PRa27TRfiNgkFlyRT6SuxBxTl7kJ5eRD4Qjnv/iSh4IJkKv0G7NmJMn5Qnp4EPhmLWuOmC3GDhEIJJVPpMPbdheeZrkW4bh8Qj0WtQdOFlJqEQok4dxg+jCxkqiQ92MHQY7qQUpJQKIFkKh0Bbge2mq5FeG4A+Fgsau0zXUipSCgsUDKVrgH+Clm/UMnGgB2xqLXHdCGlIKGwAM7mqXHgfNO1CONGgFgsah00XchCSSjMk9ND+AT2gStCAPQDH4pFraOmC1kIPx4b5nvJVLoKiCGBIIotBj6ZTKXbTBeyENJTmIfk4+n3VmdDl9VOVI1XZ0JTobzSoQK6Kq+0/W+FykO+WjMVKYRykULVVLUO5yKF6qnqQk0uoutQEshlrBv4cCxqDZkuZD4kFM4ioRKLsY9iWw+s3nDjddWtF2y8QKHmvWWaRk9NRfRgpj43MdaU06MtuZrxptyifLWuLVnhwrTd2GMMgds9Omy6AL9JqMQy7ANZNzuP9hMfW/bcS7sXX3De6oW2oVDVkaxaEslGaBqM2H9XgHyVHh5vmhocWJINDVrZtny1rltoW8KY9cC7gc+ZLmSupKcAJFRiOXCl81h7qs9p2bi297x3vtlSofn3EOZCowvZ2sKxwSXZTHrZZGu2rtDsRbui5L4ai1o/MF3EXFRsKCRUogl4CfaU5HPO9LmR1paRLbffHA6Fw8b+ck/W5o/2dkzk+tszK2Q8IlAK2JObArPsuuJCIaESHcCrgBcwmxWMShW23P7uY7VW6zK3a5uNfEiPDLRn+no6xpfkarScJRkMo8D7gjIdumJCIaESW4H/wRxnHp5z3Su62rdf3OlOVfOn0fnxptyRg+vHGicb862m6xFndRC4LQgbwpb9QGNCJdZjb4V24Vyf27Jxbe/Sy7cueGDRDQpV1TBSvfq837YURhZNHTy4cWzxVE2h0XRd4rQ6gLcDXzZdyNmUbU/BuYtwPfbg4Zz3Naiqr53cGr91qqom0lTy4lyg0VP97ZnDh9aNrShUycYuPnZHLGr92nQRZ1J2oZBQiWrgjcBrWUBP6PzbbjrUuHr5qpIV5pGC0uNHOyfSvR0THaZrEac0Atwci1oDpgs5nbIaxU6oxAbgi8C1LCAQVr7kqq4gBgJASKv6FQfqOzY92nKkOhMaNV2P+CNN2KeC+VZZ9BSc3sGbsXsHCwq6SGvLSHTHLTWqKhT4LnhB6YlD68b6+pZnAhlwZe7zsaj1gOkiTiXwPYWESqwEvgD8GSX4eja87XVD5RAIACGt6jp2N65a/7vm7qqc8v2od4V5h3NamO8EOhQSKrEN+Cz2yO6CtZ6/saexY2XZ/VVtHK5e/Zz/ap1qGApXzIEmAdCAfZao7wTy8iGhEgp4PSXcMVlVhfIXf+K24XB9Xdne89foqe71Y71yOeEbGnvuwm7ThUwXuJ5CQiUi2HsZvJkSbqHe8aoXd5dzIIC9EGv17oaVq3Y3HDBdiwDsn993JFNpXx0FEKhQSKhELZAAtpfydSOtLSPLrty2opSv6VcKpZb01K5Z+1RTF5rgdRPLz3nYU+59IzChkFCJBuCTuLAf4oYbymdwcbZa+iOdG1Ith1WBvOlaBG9NptK+WSYfiFBIqEQz8ClgY6lfu2ltx/HGzvIbXJyNhtHwqvVPNB+RHoNxi7HvnvmC70PBWeJ8J6fZ52Ch1vzZy8r2TMDZaBipXr326abA70BcBq5JptK+WPXq61BwBhU/SoluOc7UsGp5X/2K9uVuvHaQtPRHOjt2NXSZrqPC1QPXmC4CfBwKzm3HD2IPxLhizbVXTyilfDXya0pbb23nin31EgxmvdoPYwu+DQXgRuAyt168rt0aaOxYudKt1w+ipYdqO6wjNd2m66hgjcArTBfhy1BIqMSLgVe72cY51758VHoJxRRKrdrT0F47VuXbFXwV4DXOQUPG+C4UnO3S/pebbdRYrUNN53ZWxLyEuVKoyLrfNxdUnooegDWoBXipyQJ8FQrOwOIHmM3eiQvQ+eqXDCrlza7MQVQ9FWpbu7PpiOk6KtjVJhv3VSgAbwPWuNmAClflFm1a1372z6xszQORThlfMGZVMpU2dmixb0IhoRKXAq90u50ll17UEwpXyUlMs7Bqb0N7zXho0HQdFcrYJYQvQiGhEvXALV60tey5l5X9ZrWlorSKnLOzadx0HRVqezKVNrI/qC9CAXsJ9CK3G4ksah6ta7d8cX5DUNSNh1e09kZkfMF7EQwtlDIeCs7dBtcvGwBW/On2PrkNOXer9zQ0yN0II15molHjoQC8E/DkToB1yQVyHuM8VOVDLav2NUhvwXurk6m0K2t+zsRoKCRU4krmcUjLfDSvX9Nb7puouKmtp2ZFZCI0bLqOCnSF1w0aCwVnbcObvGpv2VXbZOPSBVCo6s5djUOm66hAJd1QaDZM9hQuBzw7kq153Rq5dFighuHwSrlF6blVyVTa06MLTYbCdV41VNO2qKw3ZPWKQoVW7W2QSwjvXeplY0ZCIaESFwPnetWetfV8WeBTIk0D1SvDGTVmuo4Ks83Lxkz1FF7nZWOtF5wn6xxKRKGqVhyoT5uuo8JsSqbSnp0o7nkoOCc6eTevOxTK169otzxrrwK0HqtZogrkTNdRQULAJi8b89oLvWys+dyOtKx1KK2QVvVtR2t6TNdRYcozFJzbkJ5O3bQuvmDCy/YqRdvRWpkZ6q3yDAXgIsDTrnzzuZ3SS3BB3WhVeyhP1nQdFWRDMpX2ZDGf16Hg6aUDQKS12Zcn+wadQlW39tb0mq6jgkTw6I6dZ6GQUIkQcIlX7QFEWltGQ+Gw8d1xy5UllxBe8+QSwsuewnrs3Wo909S5UibauEguITxX8hPSTsXLUIh62BYATWs75AfWRQpVveh4zTHTdVQQT4439DIUtnrYFgANHStk0pLLmvurZb6Cd1Z4cWy9J6HgnBi9wYu2pqtb0ubp5Uolqh8J15uuoYJEgCVuN+JVT2E9Hm2kcoIKh6eq6mvlzoPLIplQK3KcvZdcP9XMq1A4x6N2Tqprt0aUUn7YWaqsKVR1w2hYFpx5x/VxhbINhZrWZhlk9EjTQLWsmvSO9BTmK9LaIhuNeqRxsFrmK3jH9YOMXA+FhEqE8ehWynQ1i1oKXrdZqWomQzKV3Duu7yDmRU9hOeD5ASyRRU3a6zYrVVUuJLNGvVMWoWBkG7RIS5PMUfBIKE89Gglhb7h+apQXoeD6yU+nEm5skFDwiEJVhbNKlqh7oyGZSrv6s122PYVwfZ2rx9mLYjWTVXIHwjuu9ha8uNafc0/hW3zroi662uuoy9zKrf8xn0araiI183meKROZ0dAtn710ez6fDeUL+dC2TS87cst1f/+s6bpmKzIZyo4FaKrYaP/x8LdjN13U172/CaV4zUc+88R5V744KPMtmgHXttr3IhTm/KMSJdp9OZcf+CE/3DLfRlVIBeryoSZSX/j8rQ890lTfms/mJtXNn9l6xaM77z22bfPVgThnoToTCtSsxn/92HvOX3fZ847f9I/3PD6VmVSZsZEg/by4OrDrxeXDnG9XbWJTfwMNC5t8FLCDZEMqRFN9ax5gKpcN5fO5QJ2FG9IqMAONY4P94cPP/L7t+W9770GA6ppa3bh4SZAWdrn6e+tFT8HQVOMA/UY5cvkpbrpz83P7Bg83PG/r6w9csullgeglAKgAzQrp3fNMfV1TS+afb33LlnTXnualazcOXvvxLz9d19QSlN5O4AcajYRCENc9hKuq+dqO3Q/+c3z//fsOP7Ho6f0PuX77qVSUDk4G53M5lT64t2X7G2488P4f/ubBSG1d/idf/Pg603XNgas/21784hjpVmqtA9Odnam1qT23ac3lfY/8/oeuL5MtlQBdPdC2es1kw6K2yQ3bXzgIcOFLX9tzdM/OAA2T4mq/zItQMNQl0wHq0MLxge7IwEhvGGB8ciT09L5fWR3LNo+armu2dID6ZYtXdmYaFy+ZOPR0qgHg2YcfsKyOc0dM1zUHrv5OeTGmMOcBw6/z9a099LRNMhn5NJ9+0eVcvusqruqe04sErKfQ23+g5nPfvjFa0HmltebSzS8/8pLLbgjMVmdaEZzrB+CaD37qqe/ueNfWfC4Xalm6fOxNf33XE6ZrmgNX/+B5EQpDc33C9Vz/24U2qgvB6imcf+5VI1/d8eyDpuuYr6lIIUB9BVh7yRXDt/3g1/9puo55yrj54l78jzQygl7ITrn6jRPFsrX5QE0WCzhXdykv21DITUzIJiseytQWZKWkdwIfCkamjk6NjAdpMkqgaXRhqqYgG7h6YyIWtVz92S7fUBgeCcpElMArhBhHmZqkVnFcv0vixf/IHlweLT2V7KAcDuWVfLggy6a9M+eB+7lyPRTiOp4FjrjdzkyZweEgLXAJtKmawqTpGipIWfQUAPZ71M5Jmf4hz7eAq1SjLblA3f4NONfnrpRtKGQHh2STFY8Mt2blzoN3DrvdgFehcMCjdk6aONbXFOT1D0Gh0fmx5pyR3bUqVNmEwrN4vDCqkMlG8pMZGW102VRE9+sqqk3XUUEOud2AJ6EQ1/EhYJ8XbU03mR4I0iKXQBpvysmdB+/kgF63G/Hy3nLKw7YAGOs+IhOYXDbSmg3UQqiA64lFLdcHdcs6FEb2d0u31kUanR9Ykg3Mng9lwPXxBPA2FHYCnt7PHtnf3ehle5Vmsj7fm6/WcmScd3Z70YhnoRDX8Rwe9xYy6YHmQj4vqyVd0t+ekcszb+30ohGv56v/wuP21NTQiOvTQiuRRuf7lmXk0sE7Oey7eK7zOhQexeVlnzONHDg07mV7lSJTl+/NV2uZtOSdvbGo5cl2AJ6GgnMJMa8Tn+ar77dPyTWvC/qWyaWDx57xqiETy11/7mVjg3/Ya+lCYcrLNsudVjqTXj65zHQdFaZ8QyGu43vxcCKTzuXDE73p4161VwmG2rJHC2FkbYl3NB4NMoKx05v4Vy8bG3hyl/QUSkSj9ZE143M+NFgsyJ5Y1PJsW0NTofArPJqIAZB+/Mlmr9oqd+NNucOZ+kKQDk4pB4962ZiRUIjruAbu9qq9id50qyyOKo1D68bkjoP3yj8UHL8APLvWH9nfbWSvyHIyUZ87PN6UbzNdR4U5Hotae7xs0FgoOLcnv+dVe70PPS4DYwug0fmu80all+C9h71u0PQOvD/FozsRA0/tWpbPZGUp9TwNWdlDE435xabrqEAPed2g0VBwxhb+AW82YFH9TzzT50E7ZacQ0mMHN4y1m66jAh0D/uB1o6Z7CsR1/Bk8WhNx+GcPya20eTiyZrwvH5bVkAbcF4tanm8paDwUHF8DXF+jMHksvSjTP+j6zjXlJFuTP3Z85eRq03VUoDxwv4mGfREKcR0fBL7iRVtHf/WYLKWeJY3O7980GiJgx8yXicdiUavfRMO+CAWAuI7fjz2pyVW9Dz22TNZCzM6xVZOHxptzluk6KtRPTDXsm1BwfBmXD7soZLKR4b0He9xsoxyMN+QOH1k73mm6jgqVBh431bivQiGu42PAZ3H57Mmue+5rlDMhTi9fpYf3XDgsk5TM+amJAcYTfBUKAHEd3wl8y802xg8fXTx+uNfz8y2DQKPz+54zPCl7LxozAfy7yQJ8FwoAcR3/LvCgm20cuPtemeF4CsdWTR4aXZRbarqOCvbjWNQaNVmAL0PB8UVcnLgxsr97yfjR4zK2MM2AlemScQSjMsA9povwbSg4R9h/HBeXWHd9/ye+/fq9NtIydfDAptEO03VUuHtjUcv4RsO+/qWI6/gI8FHAlenJQ8/ub59MDxx147WDZLwhd3jPhcOrZD6CUVng+6aLAJ+HAkBcx48BH8alW5Vd99xX0XchMrX5o89Gh9pR/v9ZKHP3xaKWL5b3B+IHIa7jPcCHgJKPAQw8tWv5xLG+ihxbyNbke/9w8eBiHSJsupYKNwZ8x3QRJwQiFADiOp4GYkB3qV9799f+b60u6HypX9fPxhtyh3deMri4UCUbsPrAt/0wlnBCYEIBIK7j/djBsKuUrzvec6y173dPlzxs/Gq4Ndu1a+vQcl2FHMBrXjeG5yXMFKhQAIjr+DB2MNxXytfd951/W5HPZI3eH3abRuveVRNdey8Y6ZQxBN/4x1jU8lUvVQV5tm9CJa4G3gGluSZeun1r99rrXlmWy4QLSk92bRztG1yaXWm6FnHSr2NR6w7TRcwU6L8WcR2/F7gdKMmo7bGHf7t68nh/2d2izNTmjz6zbXBKAsFXsni0XcBcBToU4ORaiXdTomnRz371ezXlMuio0bneVRMHdm4bbM/WFppM1yOKfDMWtXy54U+gLx9mSqjE5cBfAAvadu2c615xoH37xWtKUpQhU9WFvr3nDzMhW7L70TPAh0yuhDyTsgoFgIRKNAHvBJ437xdRqrBlx7uP17a1Bm6zUo2e6l+aOdy9YWyVzD/wpQzwnljU8u0q3bILhRMSKrEZeBtw3nyeH2ltGdly+83hUDgciLMONFqPtuQOdW0cXTQllwp+9rexqGVsV6XZKNtQOCGhEtuB64E5D7JZ2y48vO7Nr/H94NxkXb6n67yRiJze5Hv/FYtanzRdxNkEfqDxbOI6/jBwM/C3zHHFZfrR368ceHp3lyuFlUC2Jn9s3+aRnme2DS6XQPC9NPAl00XMRtn3FKZLqIQCLgauAaJw9lWBKhye2hp/73B1U4Mvfuk0empk0VRPz5qJetlUNTCyQCwWtXabLmQ2KioUpkuoxGrgFcCVwBmPVq9fvnTggg+8o1GFQsamBeeqCoN9yzNDvR0T7XIwS+B8Lha1PDnwqBQqNhROSKhECLgAuALYzmkCYsllWw6tfcM1K5VSnu05kK8qDA23Tg32LcvUjrROLZX9DgLpnljUust0EXNR8aEwnRMQzwG2AJuBDfD/VxF2vvalB5Y/77I1brWv0blMXeH4UFs2M7Ak0yRzDAIvBXwsFrVc3Z281CQUziChEmFgHXZArAdWbX739bp53ZpzFvraBaXHs7WFoYmGXHasOafGmnMNE425RTpE1UJfW/hCD/A+05uwzoeEwhz9w33Docn6/AfDWbWudjw8FcmE8qECOpRXhAqKUF4pVUCFCkrlw7qQqy7oqUhB5SI6NBUpVE1FCtWZ2nx9rkY3mP5ahGv6sQcWA7l5j4TCPCRT6VrgU9i9ByGmG8EOhIOmC5mvsp+n4IZY1JoEEsAh07UIXxkH4kEOBJBQmDdn+6wPA4H+ARAlkwU+EZS5CGciobAAsag1CHwEOGC4FGFWDvhULGo9ZbqQUpAxhRJIptJNwCeAc03XIjw3iR0IKdOFlIqEQokkU+kG7BOtNpiuRXhmGHseQuAvGaaTUCihZCpdD+zAniEpytsx4KOxqOXasYamSCiUWDKVDmPv/vRi07UI13Rh32Vw5ThD0yQUXJJMpV+LvcmLrFcoL08BdwRxpuJsSSiUgFJqDbAfqNZa5068P5lKXwp8AJBVjeXhx8BXYlErd9bPDDAJhRI4XSgAJFPpNdgnZy/xvDBRKlPA38ei1v2mC/GC7+cpKKVWK6W+r5Q6rpTqU0p9WSkVUkrtUEp1KaWOKaW+rpRqmfacK5VSDyulBpVS3UqpG5z3tzife9x57g6lVMj52A1KqV8ppf63UmpAKbVfKXX1tNc8oJR60bS3P6aU+qbz5ont5QeVUqNKqctPfF4sah0A3gs87Nb3SLjqKPD+SgkE8HkoKKWqsM/Z6wLWYO+z+B3gBufxAmAt0Ah82XlOB3Av8DfYf523AL9zXvJvsPdLWIu92/P12Nf9J1yGfU6lBXwauGuW+yc81/nvIq11o9b6kekfjEWtkVjUuhN7S7jMbL524QuPALfGotY+04V4ydeXD85f3B8By6d3y5VSPwfu1lr/nfP2RuwBoDrsa/hLtdavnfFaVdhz06Na653O+94JvFFr/XynN7FDa73O+Vg99hHhy7XWR5VSB4C3a61/5nz8Y8A6rfVbznT5MFMylV7l1Lh2vt8X4box7DMeHzBdiAm+7ikAq4GuU/yircDuPZzQhX2eZLvznL2neC0Le8OUmc+bvlvzySPjtNbjzj8b51X5acSi1iHgNuAewL+JXLkeBf6iUgMBSnQwq4u6gQ6lVHhGMBwBOqe93YE9/7zXec6lp3itNPaAUSewc9rzZjv5ZAyon/b2smn/ntMvtzN6fVcylX4E+8i7sjzUNmBGgf9TyWFwgt97Cr/B3sEmqZRqUErVKqWuAL4N/KVS6hylVCP23gbfdYLjX4AXKaWuU0qFlVJtSqktWus88D3gDqVUk1KqE3gf8M1TN/1Hfge8QSlVrZS6BHjdtI8dBwrM8ZIgFrV2Au8BvoG9yk6Y8WvgZgkEm69DwflFvgZ7S7SD2PsXvB74KvYv0oPY1/KTwC3Ocw4CL8fuovdj/zJf5LzkLdh/8fcBvwK+5bzWbPwV9oKnAey9FL41rc5x4A7gIeeOx5/M9muMRa1cLGp9D3gX9g+n8M4+4PZY1LojFrX6TRfjF74eaKxEyVQ6CvxP7Lstwh392D3En/n1kFeTJBR8KJlKK+wt59+EjDeUUgb4AXC3s3uWOAUJBR9zwuG5wBuZx1mY4qRx7LkrP5LLhLOTUAiAZCodwp6odS0SDnMxgD3P5d5Y1BozXUxQSCgEiNNzuAh4JbANnw8UG9QDfB/4eSxqTZkuJmgkFAIqmUovAa4GXsJZzsKsEDnsW9j3Ab+VAcT5k1AIuGQqXY19BuZVwFbA2CG4huwDHgB+6eywLRZIQqGMONvBXYp9knY5B8Q+7OnIDwb9jAU/klAoU8lUug47IC4DLiTYlxiT2JPQHgMeK9dt0PxCQqECOAOUndiDlM/BPjDXzyExBuzBXsb+JPC0DBh6R0KhQiVT6ZXY07Y7nUcH9iIvr/eUnMCevr4beBY7CA7LQKE5EgripGQqXYM9g7IDe6n5IqB1xqNuDi85hb36cBQYwl7F2oO9RP0ocFQGB/1HQkHMSTKVjmAPYFbNeISxexlZ7L/+mVjUkpWfASShIIQoIjPihBBFJBSEEEUkFIQQRSQUhBBFJBSEEEUkFIQQRSQUhKtOHMfncZu/VEq93cs2y4mEgqhoM88IFRIKQogZJBREyZzqhPBpHzvdad4rlFI/Ukr1K6X2KKVumvaxKqXUR5RSe5VSI0qpx5VSq52PbVdKPaqUGnL+u/00NZ2rlHrAqSetlPoXpdQi52PfwF7n8W/OaeEfdN7/J9NOLX9CKfV8V75hfqW1loc8FvzAXv/wBPB5oAGoxd7s5QbshVE3OZ/zLuxj/05Msf8P4O+cz9+CfdrWC52PfQB76fRG7HUVFwFtwGLsTVn/HHvNxRudt9uc5/0S+zBgsA8SejFQg30K+YPAF6bVfQB40bS3VwJ92AcKhZzn9gFLTH+PPft/aboAeZTHA7jc+YUOz3j/DcCeaW/XY5+9uQx7RWYeaJr28TuBf3L+vQt49Sna+nPgNzPe9whwg/Pvk6Fwiue+BkhNe3tmKHwI+MaM5/wUeKvp77FXD78fMCuC43QnhMOM07yVUmCf5t0G9GutR6Z9bhdwybTXPNUJ4jNPHT/xvD/a/l4ptRT4EvYelk3Yf/0HzvB1dALXKqWumfa+auAXZ3hOWZExBVEqJ08In8NzjgCLlVJN0943/STwbuyNYE71vM4Z7zvdCeJ3YvdMLtRaNwNvoXgjmZnLhLuxewqLpj0atNbJWX1FZUBCQZTK6U4IPy2tdTfwMHCn8/kXAjdinxwO8BXgE0qp9cp2ocKrzhkAAADRSURBVFKqDfh/wAal1Juck8Vfj73F3L+fopkm7E1eBpVSK7HHKabrpfi08G8C1yilXuoMdNYqpZ6vlFo1l29GkEkoiJLQpz8h/GzeiH2Y7hHscx7jWuv7nY99Dvge9lkOw8BdQJ3Wug/7QJzbsAcBPwi8UmudPsXrJ7B3th4Cfox9SMx0dwI7nDsN73eC6tXAR7DHSLqxg6RifldkkxUhRJGKST8hxOxIKAghikgoCCGKSCgIIYpIKAghikgoCCGKSCgIIYpIKAghikgoCCGK/DdP6D/JFLaOJgAAAABJRU5ErkJggg==\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "venn2(\n", " subsets=[i for i in df.groupby([\"chocolate\", \"coconut\"]).size()[1:]],\n", " set_labels=(\"coconut\", \"chocolate\", \"both\"),\n", " set_colors=(\"purple\", \"skyblue\"),\n", " alpha=0.7,\n", ");" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.12" } }, "nbformat": 4, "nbformat_minor": 4 }